Load data and packages

library(dplyr)
# Read in the data
# stu_data <- read.csv("data/sim_assessment_data.csv", stringsAsFactors = FALSE)
load("data/cohort_test_scores.rda")
head(sch_score, addrownums = FALSE)
load("data/agency.rda")
load("data/attend.rda")
load("data/expel.rda")
load("data/enrollment.rda")
load("data/staff.rda")
ls()
[1] "agency"      "attend"      "expel"       "full_enroll" "sch_score"  
[6] "staff"      

join data

full_data <- inner_join(sch_score, agency, 
                        by = c("distid", "schid", "test_year"))
full_data <- inner_join(full_data, attend, 
                        by = c("distid", "schid", "test_year", "school_year"))
full_data <- inner_join(full_data, expel, 
                        by = c("distid", "schid", "test_year", "school_year"))
full_data <- inner_join(full_data %>% dplyr::select(-enrollment), full_enroll, 
                        by = c("distid", "schid", "test_year", "school_year"))
full_data <- inner_join(full_data, staff, 
                        by = c("distid", "schid", "test_year", "school_year"))
str(full_data)
'data.frame':   15087 obs. of  69 variables:
 $ distid                    : num  275 222 288 287 174 290 148 88 174 174 ...
 $ schid                     : int  9 1 11 10 120 3 2 1 129 64 ...
 $ test_year                 : num  2007 2010 2007 2011 2011 ...
 $ grade                     : num  7 4 4 4 4 7 5 8 4 5 ...
 $ subject                   : chr  "read" "read" "math" "math" ...
 $ n1                        : num  195 107 39 12 42 89 72 111 10 20 ...
 $ ss1                       : num  1213 1115 966 999 931 ...
 $ n2                        : num  199 109 41 12 33 75 76 108 21 36 ...
 $ ss2                       : num  1233 1166 1063 1071 1015 ...
 $ school_year               : chr  "2006-07" "2009-10" "2006-07" "2010-11" ...
 $ cesa                      : int  1 2 9 4 1 3 10 1 1 1 ...
 $ county                    : int  40 13 37 32 40 25 9 40 40 40 ...
 $ athl_conf                 : int  47 158 46 28 27 42 4 47 27 27 ...
 $ locale                    : int  3 4 2 2 1 4 4 3 1 1 ...
 $ locale_desc               : chr  "Urban Fringe of a Large City" "Urban Fringe of a Mid-Size City" "Mid-Size City" "Mid-Size City" ...
 $ agency_type               : chr  "Public school" "Public school" "Public school" "Public school" ...
 $ grade_group               : chr  "Middle School" "Elementary School" "Elementary School" "Elementary School" ...
 $ low_grade                 : chr  "06" "03" "KG" "K4" ...
 $ high_grade                : chr  "08" "05" "05" "08" ...
 $ school_size               : chr  "Large" "Medium" "Medium" "Small" ...
 $ charter_ind               : chr  "No" "No" "No" "Yes" ...
 $ title1a_program           : chr  "TASNF" "TAS" "TAS" "SWP" ...
 $ title1a_program_desc      : chr  "Targeted Assistance eligible but not funded" "Targeted Assistance eligible and funded" "Targeted Assistance eligible and funded" "School-wide Program" ...
 $ days_possible             : num  107260 76496 53705 21606 72782 ...
 $ days_attended             : num  103245 73609 51537 20568 67523 ...
 $ att_rate                  : num  96.3 96.2 96 95.2 92.8 ...
 $ total_enroll              : int  614 445 310 133 418 268 505 287 349 353 ...
 $ suspension_rate           : num  10.9 NA 2.9 NA 14.3 ...
 $ suspension_count          : num  67 0 9 0 60 0 2 4 204 102 ...
 $ susp_rate_female          : num  4.12 NA 0.6 NA 10.81 ...
 $ susp_rate_male            : num  17.03 NA 5.59 NA 17.17 ...
 $ susp_count_female         : num  12 0 1 0 20 0 1 1 70 25 ...
 $ susp_count_male           : num  55 0 8 0 40 0 1 3 134 77 ...
 $ amer_ind_enroll           : int  3 2 1 1 1 1 6 5 2 2 ...
 $ asian_enroll              : int  42 13 52 9 132 2 7 18 2 16 ...
 $ black_enroll              : int  104 13 15 0 232 5 14 59 336 284 ...
 $ hispanic_enroll           : int  28 16 9 5 24 4 8 9 0 11 ...
 $ mutli_enroll              : int  0 0 0 11 0 0 0 0 0 0 ...
 $ pac_island_enroll         : int  0 0 0 0 0 0 0 0 0 0 ...
 $ white_enroll              : int  437 401 233 107 29 256 470 196 9 40 ...
 $ amer_ind_susp             : num  NA NA NA NA NA NA 0 NA NA NA ...
 $ asian_susp                : num  1 NA 1 0 1 NA 0 0 NA 0 ...
 $ black_susp                : num  25 NA 3 0 55 NA 0 4 198 92 ...
 $ hispanic_susp             : num  NA 0 NA NA NA NA 0 NA 0 NA ...
 $ multi_susp                : num  0 0 0 0 0 0 0 0 0 0 ...
 $ pac_island_susp           : num  0 0 0 0 0 0 0 0 0 0 ...
 $ white_susp                : num  35 0 4 0 3 0 2 0 NA 7 ...
 $ enrollment                : num  612 445 311 133 418 268 505 287 349 353 ...
 $ frl_per                   : num  17.6 16.6 45.7 36.1 82.1 ...
 $ non_frl_per               : num  82.3 83.4 54.3 63.9 17.9 ...
 $ frl_count                 : num  108 74 142 48 343 71 282 29 328 296 ...
 $ non_frl_count             : num  504 371 169 85 75 197 223 258 21 57 ...
 $ swd_per                   : num  9.64 14.38 7.72 4.51 14.11 ...
 $ non_swd_per               : num  90.4 85.6 92.3 95.5 85.9 ...
 $ swd_count                 : num  59 64 24 6 59 49 79 34 57 69 ...
 $ non_swd_count             : num  553 381 287 127 359 219 426 253 292 284 ...
 $ ell_per                   : num  5.23 2.7 19.94 NA 20.81 ...
 $ non_ell_per               : num  94.8 97.3 80.1 100 79.2 ...
 $ ell_count                 : num  32 12 62 NA 87 2 2 26 NA 2 ...
 $ non_ell_count             : num  580 433 249 133 331 266 503 261 349 351 ...
 $ total_enrollment          : int  614 445 310 133 418 268 505 287 349 353 ...
 $ admin_fte                 : num  2 1 0.8 0.33 1 1 1 0.7 2 1.5 ...
 $ ratio_stu_to_admin        : num  307 445 388 403 418 ...
 $ support_staff_fte         : num  14.51 13.93 15.78 0.79 9.95 ...
 $ ratio_stu_to_supstaff     : num  42.3 31.9 19.6 168.3 42 ...
 $ licensed_fte              : num  47.9 38.71 28.67 9.43 30.48 ...
 $ ratio_stu_to_licensed     : num  12.8 11.5 10.8 14.1 13.7 ...
 $ total_staff               : num  64.4 53.6 45.2 10.6 41.4 ...
 $ ratio_students_to_allstaff: num  9.53 8.3 6.85 12.61 10.09 ...

clean up workspace

rm(agency, attend, sch_score, expel, staff, full_enroll)

count rows proposed models

full_data %>% dplyr::select(test_year, grade, subject) %>% 
  distinct %>% nrow
[1] 50

rows per model

full_data %>% dplyr::select(test_year, grade, subject) %>% 
  group_by(test_year, grade, subject) %>%
  summarize(count = n()) %>% 
  pull(count) %>% summary
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  152.0   220.5   253.0   301.7   425.8   469.0 

loop regression model to 50 subsets of data

# Load the packages we need to fit multiple models
library(tidyverse)
── Attaching packages ──────────────────────────────────────── tidyverse 1.2.1 ──
✔ ggplot2 2.2.1     ✔ readr   1.1.1
✔ tibble  1.4.2     ✔ purrr   0.2.4
✔ tidyr   0.8.0     ✔ stringr 1.2.0
✔ ggplot2 2.2.1     ✔ forcats 0.3.0
package ‘tibble’ was built under R version 3.4.3package ‘tidyr’ was built under R version 3.4.3package ‘forcats’ was built under R version 3.4.3── Conflicts ─────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
library(broom) # to manipulate regression models
package ‘broom’ was built under R version 3.4.4
library(modelr) # to fit multiple models and get their attributes easily

Attaching package: ‘modelr’

The following object is masked from ‘package:broom’:

    bootstrap
# Define a grouped dataframe using only the columns we need
# group by the grade, subject, and year
# Then nest so the data hangs together in one list
by_group <- full_data[, 1:10] %>%
  group_by(grade, subject, test_year) %>%
  nest()
# Define a simple function that fits our model
simple_model <- function(df) {
  lm(ss2 ~ ss1, data = df)
}
# Apply that model to each group in our dataset using the `map` function
# This is equivalent to a loop, but more efficient to write
by_group <- by_group %>%
  mutate(model = map(data, simple_model))
# To understand our model(s) - use the `glance` function to compute some 
# summary statistics for each model
glance <- by_group %>%
  mutate(glance = map(model, broom::glance)) %>%
  unnest(glance, .drop = TRUE)
# Display the output
arrange(glance, desc(r.squared))

create a group data frame

by_group <- full_data[, 1:10] %>%
  group_by(grade, subject, test_year) %>%
  nest()
head(by_group)

fit model to each group in df

simple_model <- function(df) {
  # df is a user specified parameter to the function
  # ss2 and ss1 are variables that must be present in the df provided by the user
  lm(ss2 ~ ss1, data = df)
}
head(by_group)
by_group <- by_group %>%
  # take each element of the data column, and pass it individually as the 
  # first argument to the simple_model function
  mutate(model = map(data, simple_model))
head(by_group)

add column for model and fit to each group

simple_model <- function(df) {
  # df is a user specified parameter to the function
  # ss2 and ss1 are variables that must be present in the df provided by the user
  lm(ss2 ~ ss1, data = df)
}
head(by_group)
by_group <- by_group %>%
  # take each element of the data column, and pass it individually as the 
  # first argument to the simple_model function
  mutate(model = map(data, simple_model))
head(by_group)

add comlumn for extracted coefficients from model

by_group <- inner_join(
  by_group, # our original data
   by_group %>%
      mutate(coefs = map(model, coef)) %>%  # get the coefficients out of the model
      group_by(grade, subject, test_year) %>% # group the data 
      do(data.frame(t(unlist(.$coefs)))) # split each coefficient into a new column
)
Joining, by = c("grade", "subject", "test_year")
names(by_group)
[1] "grade"        "subject"      "test_year"    "data"         "model"       
[6] "X.Intercept." "ss1"         
# Replace the 6th elements name
names(by_group)[6] <- "est_intercept"
# Replace the 7th elements name
names(by_group)[7] <- "est_ss1"

plot all 50 estimates on graph

ggplot(by_group) + scale_x_continuous(limits = c(700, 1600)) +
  scale_y_continuous(limits = c(700, 1600)) + 
  geom_abline(data = by_group, 
              aes(slope = est_ss1, intercept = est_intercept), 
              alpha = I(0.5)) + theme_bw()

What we learned from previous diagnostic analyses

In our previous module we determined that a one-size-fits-all approach led to a model that

mostly identified schools in lower grades, and most often identified schools

with small class sizes.

Residuals of 50 models showed misspecification

Outliers and high leverage points are influencing data. We know when looking at math compared to reading, math scores have more outliers that are pulling the slope of ss1 down.

Possibly add subject as variable

Possibly test other variables, like grade level, to find outliers

Also learned that outliers exist in context of grade and locale. These also might be good variables to include in model. Also may want to test some more using DFITS and Cooks.

New diagnostics/model comparisions

TESTING FOR OUTLIERS

Look at high leverage points for variables- both to include in 50 models and maybe to include in global model

create model and augmented scores

library(broom) # R package for working with regression models smoothly
# Create a new dataset with augmented values
m1 <- lm(ss2 ~ ss1 + test_year, data = full_data)
sch_score_m1 <- augment(m1) # generate the leverages and more for each row in data
names(sch_score_m1)
 [1] "ss2"        "ss1"        "test_year"  ".fitted"    ".se.fit"    ".resid"    
 [7] ".hat"       ".sigma"     ".cooksd"    ".std.resid"

generate influence scores

m1_inf <- influence.measures(m1)
head(m1_inf$infmat)
         dfb.1_       dfb.ss1      dfb.tst_        dffit    cov.r       cook.d          hat
1 -0.0055337529 -4.665108e-03  0.0055733067 -0.008151389 1.000443 2.214951e-05 0.0002900181
2 -0.0012741071 -7.049256e-05  0.0012756778  0.002160841 1.000292 1.556510e-06 0.0001017740
3  0.0120029041 -1.548576e-02 -0.0118488071  0.021686886 1.000362 1.567718e-04 0.0003979184
4 -0.0051323628 -5.058499e-03  0.0051808896  0.007940750 1.000505 2.101964e-05 0.0003424574
5 -0.0062581027 -9.502183e-03  0.0063484394  0.012028657 1.000687 4.823187e-05 0.0005412427
6  0.0005568328  8.676617e-04 -0.0005642642  0.001287116 1.000376 5.522590e-07 0.0001785945

facet wrap by test year

plotdf <- sch_score_m1
plotdf$dfbeta_ss1 <- m1_inf$infmat[, 2]
ggplot(plotdf, aes(x = ss1, y = dfbeta_ss1)) + 
  geom_point(alpha=I(0.2)) + 
  geom_hline(yintercept = 2/sqrt(nrow(m1$model))) + 
  geom_hline(yintercept = -2/sqrt(nrow(m1$model))) + 
  facet_wrap(~test_year) + 
  geom_rug(alpha=1/4, position = "jitter") +
  theme_bw()

some years seem to exhibit outliers, especially at lower values of ss1. These outliers seem more likely to pull down the slope of ss1. This doesn’t seem as drastic as other variables.

facet wrap by grade level

m1 <- lm(ss2 ~ ss1 + grade, data = full_data)
sch_score_m1 <- augment(m1) # generate the leverages and more for each row in data
names(sch_score_m1)
 [1] "ss2"        "ss1"        "grade"      ".fitted"    ".se.fit"    ".resid"    
 [7] ".hat"       ".sigma"     ".cooksd"    ".std.resid"
m1_inf <- influence.measures(m1)
head(m1_inf$infmat)
         dfb.1_       dfb.ss1      dfb.grad        dffit    cov.r       cook.d          hat
1  0.0029948611 -0.0024907918 -0.0002853156 -0.005628864 1.000313 1.056193e-05 0.0001548384
2 -0.0011996910  0.0016424004 -0.0021515545  0.002486645 1.000458 2.061268e-06 0.0002637846
3  0.0158692584 -0.0133577263  0.0037853084  0.019118899 1.000239 1.218420e-04 0.0002905252
4  0.0037527312 -0.0027937983 -0.0001812284  0.005648069 1.000364 1.063415e-05 0.0001968821
5  0.0100151557 -0.0089359100  0.0038804435  0.010940911 1.000593 3.990312e-05 0.0004466684
6 -0.0006522582  0.0005234677  0.0001473935  0.001365502 1.000342 6.215727e-07 0.0001459713
plotdf <- sch_score_m1
plotdf$dfbeta_ss1 <- m1_inf$infmat[, 2]
ggplot(plotdf, aes(x = ss1, y = dfbeta_ss1)) + 
  geom_point(alpha=I(0.2)) + 
  geom_hline(yintercept = 2/sqrt(nrow(m1$model))) + 
  geom_hline(yintercept = -2/sqrt(nrow(m1$model))) + 
  facet_wrap(~grade) + 
  geom_rug(alpha=1/4, position = "jitter") +
  theme_bw()

lower grade levels seem to be affected by outliers at lower levels of ss1 by pulling the slope down while higer grade levels seem to be affected by outliers at lower levels of ss1 by pulling the slope up

facet wrap by locale

m1 <- lm(ss2 ~ ss1 + locale, data = full_data)
sch_score_m1 <- augment(m1) # generate the leverages and more for each row in data
names(sch_score_m1)
 [1] ".rownames"  "ss2"        "ss1"        "locale"     ".fitted"    ".se.fit"   
 [7] ".resid"     ".hat"       ".sigma"     ".cooksd"    ".std.resid"
m1_inf <- influence.measures(m1)
head(m1_inf$infmat)
         dfb.1_       dfb.ss1      dfb.locl        dffit    cov.r       cook.d          hat
1  0.0033510034 -3.709778e-03  0.0018660197 -0.004795582 1.000363 7.666324e-06 1.849560e-04
2  0.0001768862 -8.971627e-05  0.0002034397  0.001504953 1.000266 7.550108e-07 6.907587e-05
3  0.0173100629 -1.590765e-02 -0.0025779229  0.019893416 1.000220 1.319128e-04 2.920650e-04
4  0.0054028866 -4.825047e-03 -0.0014531137  0.006799427 1.000371 1.541155e-05 2.121335e-04
5  0.0105396391 -9.591768e-03 -0.0029284416  0.012182271 1.000559 4.947142e-05 4.260801e-04
6 -0.0011778973  1.258906e-03 -0.0001999615  0.001718139 1.000348 9.840664e-07 1.486204e-04
plotdf <- sch_score_m1
plotdf$dfbeta_ss1 <- m1_inf$infmat[, 2]
ggplot(plotdf, aes(x = ss1, y = dfbeta_ss1)) + 
  geom_point(alpha=I(0.2)) + 
  geom_hline(yintercept = 2/sqrt(nrow(m1$model))) + 
  geom_hline(yintercept = -2/sqrt(nrow(m1$model))) + 
  facet_wrap(~locale) + 
  geom_rug(alpha=1/4, position = "jitter") +
  theme_bw()

looks like outliers also greatly affect ss1 by locale as well. The outliers are much more likley to appear in locale 1 (large cities) where lower values of ss1 affect the slope of ss1 by pulling it down

facet wrap by school size

m1 <- lm(ss2 ~ ss1 + school_size, data = full_data)
sch_score_m1 <- augment(m1) # generate the leverages and more for each row in data
names(sch_score_m1)
 [1] "ss2"         "ss1"         "school_size" ".fitted"     ".se.fit"     ".resid"     
 [7] ".hat"        ".sigma"      ".cooksd"     ".std.resid" 
m1_inf <- influence.measures(m1)
head(m1_inf$infmat)
         dfb.1_       dfb.ss1      dfb.sc_M      dfb.sc_S        dffit    cov.r       cook.d
1  0.0028501820 -0.0036693401  0.0043644291  2.227578e-03 -0.007648585 1.000437 1.462594e-05
2 -0.0003493112  0.0003517784  0.0016733533 -9.311227e-08  0.002675801 1.000363 1.790089e-06
3  0.0151459689 -0.0152529462  0.0039476843  4.037305e-06  0.019426971 1.000215 9.434997e-05
4  0.0028291398 -0.0028491222 -0.0006159071  7.387195e-03  0.008332229 1.001928 1.735761e-05
5  0.0086038269 -0.0086645965  0.0013901333  2.293433e-06  0.010219651 1.000594 2.611159e-05
6 -0.0018078732  0.0018206424  0.0014869494 -4.819061e-07  0.002571296 1.000482 1.652997e-06
           hat
1 0.0002369927
2 0.0001139771
3 0.0002920269
4 0.0016702124
5 0.0003983572
6 0.0002246230
plotdf <- sch_score_m1
plotdf$dfbeta_ss1 <- m1_inf$infmat[, 2]
ggplot(plotdf, aes(x = ss1, y = dfbeta_ss1)) + 
  geom_point(alpha=I(0.2)) + 
  geom_hline(yintercept = 2/sqrt(nrow(m1$model))) + 
  geom_hline(yintercept = -2/sqrt(nrow(m1$model))) + 
  facet_wrap(~school_size) + 
  geom_rug(alpha=1/4, position = "jitter") +
  theme_bw()

looks like small schools are most subject to outliers (already knew that)

facet wrap by charter

m1 <- lm(ss2 ~ ss1 + charter_ind, data = full_data)
sch_score_m1 <- augment(m1) # generate the leverages and more for each row in data
names(sch_score_m1)
 [1] "ss2"         "ss1"         "charter_ind" ".fitted"     ".se.fit"     ".resid"     
 [7] ".hat"        ".sigma"      ".cooksd"     ".std.resid" 
m1_inf <- influence.measures(m1)
head(m1_inf$infmat)
         dfb.1_       dfb.ss1      dfb.ch_Y        dffit    cov.r       cook.d          hat
1  0.0039305322 -4.228829e-03  0.0007953044 -0.005644070 1.000316 1.061907e-05 1.575692e-04
2  0.0001577928 -2.658701e-05 -0.0003471285  0.001716615 1.000260 9.823182e-07 6.912986e-05
3  0.0168974875 -1.623667e-02 -0.0017253866  0.018692335 1.000234 1.164659e-04 2.815356e-04
4  0.0034793306 -3.489553e-03  0.0116998413  0.012415386 1.001942 5.138371e-05 1.756917e-03
5  0.0092070389 -8.917964e-03 -0.0007521856  0.009824060 1.000543 3.217233e-05 3.927833e-04
6 -0.0009274549  1.002442e-03 -0.0001998413  0.001375589 1.000344 6.307896e-07 1.473811e-04
plotdf <- sch_score_m1
plotdf$dfbeta_ss1 <- m1_inf$infmat[, 2]
ggplot(plotdf, aes(x = ss1, y = dfbeta_ss1)) + 
  geom_point(alpha=I(0.2)) + 
  geom_hline(yintercept = 2/sqrt(nrow(m1$model))) + 
  geom_hline(yintercept = -2/sqrt(nrow(m1$model))) + 
  facet_wrap(~charter_ind) + 
  geom_rug(alpha=1/4, position = "jitter") +
  theme_bw()

charter doesn’t really seem to show that much of an effect

based on these analysis, it seems like to help control for outliers, location and grade level must somehow be controlled for in the model

USING FORMAL MODEL DIAGNOSTICS TO SHOW MODEL IMPROVEMENT

explore and compare residual plots to see if better fit

resiual plot 50 original models

install.packages("cowplot")
trying URL 'https://cran.rstudio.com/bin/macosx/el-capitan/contrib/3.4/cowplot_0.9.3.tgz'
Content type 'application/x-gzip' length 2412869 bytes (2.3 MB)
==================================================
downloaded 2.3 MB

The downloaded binary packages are in
    /var/folders/6w/j5943tln5437dxkgq0cfjcp80000gn/T//RtmpdXYmb2/downloaded_packages
by_group <- by_group %>%
  mutate(
    resids = map2(data, model, add_residuals),
    preds = map2(data, model, add_predictions)
  )
by_group
# unnest expands the data out from a column
plotdf <- left_join(unnest(by_group, resids),
                    unnest(by_group, preds))
Joining, by = c("grade", "subject", "test_year", "est_intercept", "est_ss1", "distid", "schid", "n1", "ss1", "n2", "ss2", "school_year")
# Residual plot
plotdf %>%
  ggplot(aes(x = pred, y = resid)) +
  geom_point(alpha = I(0.2)) + theme_bw() +
  geom_smooth()

m2 residual plot= grade and distid

m2 <- lm(ss2 ~ ss1 + grade + distid, data = full_data)
full_data$predictedm2 <- predict(m2)
full_data$residualsm2 <- residuals(m2)
ggplot(data=full_data, aes(x = predictedm2, y = residualsm2)) +
geom_point(alpha = I(0.2)) + theme_bw() +
geom_smooth()

m3 residual plot= subject and locale

m3 <- lm(ss2 ~ ss1 + subject + locale, na.action=na.exclude, data = full_data)
ggplot(data=full_data, aes(x = predict(m3), y = residuals(m3))) +
geom_point(alpha = I(0.2)) + theme_bw() +
geom_smooth()

m4 residual plot= grade and test year

m4 <- lm(ss2 ~ ss1 + grade + test_year, na.action=na.exclude, data = full_data)
ggplot(data=full_data, aes(x = predict(m4), y = residuals(m4))) +
geom_point(alpha = I(0.2)) + theme_bw() +
geom_smooth()

m5 residual plot= grade and locale

m5 <- lm(ss2 ~ ss1 + grade + locale, na.action=na.exclude, data = full_data)
ggplot(data=full_data, aes(x = predict(m5), y = residuals(m5))) +
geom_point(alpha = I(0.2)) + theme_bw() +
geom_smooth()

m6 residual plot= grade and frl per

m6 <- lm(ss2 ~ ss1 + grade + frl_per, na.action=na.exclude, data = full_data)
ggplot(data=full_data, aes(x = predict(m6), y = residuals(m6))) +
geom_point(alpha = I(0.2)) + theme_bw() +
geom_smooth()

m7 residual plot= enrollment and grade level

m7 <- lm(ss2 ~ ss1 + enrollment + grade, na.action=na.exclude, data = full_data)
ggplot(data=full_data, aes(x = predict(m7), y = residuals(m7))) +
geom_point(alpha = I(0.2)) + theme_bw() +
geom_smooth()

ADD VARIABLES TO 50 NESTED MODEL TO SEE IF BETTER FIT

by_group1 <- full_data[, 1:28] %>%
  group_by(grade, subject, test_year) %>%
  nest()
Warning messages:
1: Unknown or uninitialised column: 'model'. 
2: Unknown or uninitialised column: 'model'. 
3: Unknown or uninitialised column: 'model'. 
head(by_group1)

COMPARE 50 NESTED MODEL APPROACH TO COPLEX GLOBAL MODEL FIT TO ALL THE DATA

try new global models to compare including new variables (these selected bc of previous and above analysis as indicating they may be highly influential variables)

Moving forward with (m2) distid and grade level as added to alt model, based on residual plots and previous analysis

look at model comparison between simple and alt model

# Define a grouped dataframe using only the columns we need (1:10)
# group by the grade, subject, and year
# Then nest so the data hangs together in one list
by_group <- full_data[, 1:10] %>%
  group_by(grade, subject, test_year) %>%
  nest()
# Define a simple function that fits our model
simple_model <- function(df) 
  lm(ss2 ~ ss1, data = full_data)
alt_model <- function(df) 
  lm(ss2 ~ ss1 + distid + grade, data = full_data)
# Apply that model to each group in our dataset using the `map` function
# This is equivalent to a loop, but more efficient to write
by_group <- by_group %>%
  mutate(base_model = map(data, simple_model), 
         full_model = map(data, alt_model))
# Apply a function that makes an F-test of our two models (x and y)
# returns only the statistics from the anova object we want
# in this case, the difference in the sum of squares and the p-value of the 
# F-test
simple_anova <- function(x, y, stat = c("ss", "p"), ...){
  out <- anova(x, y)
  if(stat == "ss"){
    return(out$`Sum of Sq`[[2]])
  } else if(stat == "p"){
    return(out$`Pr(>F)`[[2]])
  }
}
ftests <- by_group %>% rowwise() %>%
    do(ss = unlist(simple_anova(x = .$base_model, y = .$full_model, stat = "ss")), 
      pval = unlist(simple_anova(x = .$base_model, y = .$full_model, stat = "p")))
ftests <- apply(ftests, 2, unlist) # convert to a numeric object
ftests <- as.data.frame(ftests) # make into a data.frame for ease of use
table(ftests$pval < 0.05)

TRUE 
  50 

this is telling me that in all 50 cases, the fuller global model was a much a better fit over the simple, proposed 50 models

therefore, I recommend using the following global model in place of the 50 model approach

# ss2 ~ ss1 + distid + grade

LS0tCnRpdGxlOiAiTUxMLSBSZWcgSUkgSWRwIFByYWN0aWNlLSBSIE5vdGVib29rIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgojTG9hZCBkYXRhIGFuZCBwYWNrYWdlcwoKYGBge3Igc2V0dXAsIG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0V9CmxpYnJhcnkoZHBseXIpCiMgUmVhZCBpbiB0aGUgZGF0YQojIHN0dV9kYXRhIDwtIHJlYWQuY3N2KCJkYXRhL3NpbV9hc3Nlc3NtZW50X2RhdGEuY3N2Iiwgc3RyaW5nc0FzRmFjdG9ycyA9IEZBTFNFKQpsb2FkKCJkYXRhL2NvaG9ydF90ZXN0X3Njb3Jlcy5yZGEiKQpoZWFkKHNjaF9zY29yZSwgYWRkcm93bnVtcyA9IEZBTFNFKQpgYGAKCmBgYHtyIGxvYWRkYXRhfQpsb2FkKCJkYXRhL2FnZW5jeS5yZGEiKQpsb2FkKCJkYXRhL2F0dGVuZC5yZGEiKQpsb2FkKCJkYXRhL2V4cGVsLnJkYSIpCmxvYWQoImRhdGEvZW5yb2xsbWVudC5yZGEiKQpsb2FkKCJkYXRhL3N0YWZmLnJkYSIpCmxzKCkKYGBgCiNqb2luIGRhdGEKYGBge3Igam9pbmRhdGF9CmZ1bGxfZGF0YSA8LSBpbm5lcl9qb2luKHNjaF9zY29yZSwgYWdlbmN5LCAKICAgICAgICAgICAgICAgICAgICAgICAgYnkgPSBjKCJkaXN0aWQiLCAic2NoaWQiLCAidGVzdF95ZWFyIikpCmZ1bGxfZGF0YSA8LSBpbm5lcl9qb2luKGZ1bGxfZGF0YSwgYXR0ZW5kLCAKICAgICAgICAgICAgICAgICAgICAgICAgYnkgPSBjKCJkaXN0aWQiLCAic2NoaWQiLCAidGVzdF95ZWFyIiwgInNjaG9vbF95ZWFyIikpCmZ1bGxfZGF0YSA8LSBpbm5lcl9qb2luKGZ1bGxfZGF0YSwgZXhwZWwsIAogICAgICAgICAgICAgICAgICAgICAgICBieSA9IGMoImRpc3RpZCIsICJzY2hpZCIsICJ0ZXN0X3llYXIiLCAic2Nob29sX3llYXIiKSkKZnVsbF9kYXRhIDwtIGlubmVyX2pvaW4oZnVsbF9kYXRhICU+JSBkcGx5cjo6c2VsZWN0KC1lbnJvbGxtZW50KSwgZnVsbF9lbnJvbGwsIAogICAgICAgICAgICAgICAgICAgICAgICBieSA9IGMoImRpc3RpZCIsICJzY2hpZCIsICJ0ZXN0X3llYXIiLCAic2Nob29sX3llYXIiKSkKZnVsbF9kYXRhIDwtIGlubmVyX2pvaW4oZnVsbF9kYXRhLCBzdGFmZiwgCiAgICAgICAgICAgICAgICAgICAgICAgIGJ5ID0gYygiZGlzdGlkIiwgInNjaGlkIiwgInRlc3RfeWVhciIsICJzY2hvb2xfeWVhciIpKQpzdHIoZnVsbF9kYXRhKQpgYGAKI2NsZWFuIHVwIHdvcmtzcGFjZQpgYGB7ciBjbGVhbnVwd29ya3NwYWNlfQpybShhZ2VuY3ksIGF0dGVuZCwgc2NoX3Njb3JlLCBleHBlbCwgc3RhZmYsIGZ1bGxfZW5yb2xsKQpgYGAKCiNjb3VudCByb3dzIHByb3Bvc2VkIG1vZGVscyAKYGBge3IgY291bnRyb3dzfQpmdWxsX2RhdGEgJT4lIGRwbHlyOjpzZWxlY3QodGVzdF95ZWFyLCBncmFkZSwgc3ViamVjdCkgJT4lIAogIGRpc3RpbmN0ICU+JSBucm93CmBgYAoKCiNyb3dzIHBlciBtb2RlbApgYGB7ciBjb3VudHJvd3NwZXJncm91cH0KZnVsbF9kYXRhICU+JSBkcGx5cjo6c2VsZWN0KHRlc3RfeWVhciwgZ3JhZGUsIHN1YmplY3QpICU+JSAKICBncm91cF9ieSh0ZXN0X3llYXIsIGdyYWRlLCBzdWJqZWN0KSAlPiUKICBzdW1tYXJpemUoY291bnQgPSBuKCkpICU+JSAKICBwdWxsKGNvdW50KSAlPiUgc3VtbWFyeQpgYGAKCgojbG9vcCByZWdyZXNzaW9uIG1vZGVsIHRvIDUwIHN1YnNldHMgb2YgZGF0YQpgYGB7ciBmaWZ0eW1vZGVsbG9vcH0KIyBMb2FkIHRoZSBwYWNrYWdlcyB3ZSBuZWVkIHRvIGZpdCBtdWx0aXBsZSBtb2RlbHMKbGlicmFyeSh0aWR5dmVyc2UpCmxpYnJhcnkoYnJvb20pICMgdG8gbWFuaXB1bGF0ZSByZWdyZXNzaW9uIG1vZGVscwpsaWJyYXJ5KG1vZGVscikgIyB0byBmaXQgbXVsdGlwbGUgbW9kZWxzIGFuZCBnZXQgdGhlaXIgYXR0cmlidXRlcyBlYXNpbHkKCiMgRGVmaW5lIGEgZ3JvdXBlZCBkYXRhZnJhbWUgdXNpbmcgb25seSB0aGUgY29sdW1ucyB3ZSBuZWVkCiMgZ3JvdXAgYnkgdGhlIGdyYWRlLCBzdWJqZWN0LCBhbmQgeWVhcgojIFRoZW4gbmVzdCBzbyB0aGUgZGF0YSBoYW5ncyB0b2dldGhlciBpbiBvbmUgbGlzdApieV9ncm91cCA8LSBmdWxsX2RhdGFbLCAxOjEwXSAlPiUKICBncm91cF9ieShncmFkZSwgc3ViamVjdCwgdGVzdF95ZWFyKSAlPiUKICBuZXN0KCkKCiMgRGVmaW5lIGEgc2ltcGxlIGZ1bmN0aW9uIHRoYXQgZml0cyBvdXIgbW9kZWwKc2ltcGxlX21vZGVsIDwtIGZ1bmN0aW9uKGRmKSB7CiAgbG0oc3MyIH4gc3MxLCBkYXRhID0gZGYpCn0KCiMgQXBwbHkgdGhhdCBtb2RlbCB0byBlYWNoIGdyb3VwIGluIG91ciBkYXRhc2V0IHVzaW5nIHRoZSBgbWFwYCBmdW5jdGlvbgojIFRoaXMgaXMgZXF1aXZhbGVudCB0byBhIGxvb3AsIGJ1dCBtb3JlIGVmZmljaWVudCB0byB3cml0ZQpieV9ncm91cCA8LSBieV9ncm91cCAlPiUKICBtdXRhdGUobW9kZWwgPSBtYXAoZGF0YSwgc2ltcGxlX21vZGVsKSkKCiMgVG8gdW5kZXJzdGFuZCBvdXIgbW9kZWwocykgLSB1c2UgdGhlIGBnbGFuY2VgIGZ1bmN0aW9uIHRvIGNvbXB1dGUgc29tZSAKIyBzdW1tYXJ5IHN0YXRpc3RpY3MgZm9yIGVhY2ggbW9kZWwKZ2xhbmNlIDwtIGJ5X2dyb3VwICU+JQogIG11dGF0ZShnbGFuY2UgPSBtYXAobW9kZWwsIGJyb29tOjpnbGFuY2UpKSAlPiUKICB1bm5lc3QoZ2xhbmNlLCAuZHJvcCA9IFRSVUUpCgojIERpc3BsYXkgdGhlIG91dHB1dAphcnJhbmdlKGdsYW5jZSwgZGVzYyhyLnNxdWFyZWQpKQpgYGAKCgojY3JlYXRlIGEgZ3JvdXAgZGF0YSBmcmFtZQpgYGB7ciBjcmVhdGVncm91cGVkZGZ9CmJ5X2dyb3VwIDwtIGZ1bGxfZGF0YVssIDE6MTBdICU+JQogIGdyb3VwX2J5KGdyYWRlLCBzdWJqZWN0LCB0ZXN0X3llYXIpICU+JQogIG5lc3QoKQoKaGVhZChieV9ncm91cCkKYGBgCgojZml0IG1vZGVsIHRvIGVhY2ggZ3JvdXAgaW4gZGYKYGBge3J9CnNpbXBsZV9tb2RlbCA8LSBmdW5jdGlvbihkZikgewogICMgZGYgaXMgYSB1c2VyIHNwZWNpZmllZCBwYXJhbWV0ZXIgdG8gdGhlIGZ1bmN0aW9uCiAgIyBzczIgYW5kIHNzMSBhcmUgdmFyaWFibGVzIHRoYXQgbXVzdCBiZSBwcmVzZW50IGluIHRoZSBkZiBwcm92aWRlZCBieSB0aGUgdXNlcgogIGxtKHNzMiB+IHNzMSwgZGF0YSA9IGRmKQp9CgpoZWFkKGJ5X2dyb3VwKQoKYnlfZ3JvdXAgPC0gYnlfZ3JvdXAgJT4lCiAgIyB0YWtlIGVhY2ggZWxlbWVudCBvZiB0aGUgZGF0YSBjb2x1bW4sIGFuZCBwYXNzIGl0IGluZGl2aWR1YWxseSBhcyB0aGUgCiAgIyBmaXJzdCBhcmd1bWVudCB0byB0aGUgc2ltcGxlX21vZGVsIGZ1bmN0aW9uCiAgbXV0YXRlKG1vZGVsID0gbWFwKGRhdGEsIHNpbXBsZV9tb2RlbCkpCgpoZWFkKGJ5X2dyb3VwKQoKYGBgCgoKI2FkZCBjb2x1bW4gZm9yIG1vZGVsIGFuZCBmaXQgdG8gZWFjaCBncm91cApgYGB7ciBmaXRtb2RlbHRvZWFjaGdyb3VwfQpzaW1wbGVfbW9kZWwgPC0gZnVuY3Rpb24oZGYpIHsKICAjIGRmIGlzIGEgdXNlciBzcGVjaWZpZWQgcGFyYW1ldGVyIHRvIHRoZSBmdW5jdGlvbgogICMgc3MyIGFuZCBzczEgYXJlIHZhcmlhYmxlcyB0aGF0IG11c3QgYmUgcHJlc2VudCBpbiB0aGUgZGYgcHJvdmlkZWQgYnkgdGhlIHVzZXIKICBsbShzczIgfiBzczEsIGRhdGEgPSBkZikKfQoKaGVhZChieV9ncm91cCkKCmJ5X2dyb3VwIDwtIGJ5X2dyb3VwICU+JQogICMgdGFrZSBlYWNoIGVsZW1lbnQgb2YgdGhlIGRhdGEgY29sdW1uLCBhbmQgcGFzcyBpdCBpbmRpdmlkdWFsbHkgYXMgdGhlIAogICMgZmlyc3QgYXJndW1lbnQgdG8gdGhlIHNpbXBsZV9tb2RlbCBmdW5jdGlvbgogIG11dGF0ZShtb2RlbCA9IG1hcChkYXRhLCBzaW1wbGVfbW9kZWwpKQoKaGVhZChieV9ncm91cCkKCmBgYAoKI2FkZCBjb21sdW1uIGZvciBleHRyYWN0ZWQgY29lZmZpY2llbnRzIGZyb20gbW9kZWwKYGBge3IgZ2V0ZXN0aW1hdGVzZnJvbW1vZGVsc30KCmJ5X2dyb3VwIDwtIGlubmVyX2pvaW4oCiAgYnlfZ3JvdXAsICMgb3VyIG9yaWdpbmFsIGRhdGEKICAgYnlfZ3JvdXAgJT4lCiAgICAgIG11dGF0ZShjb2VmcyA9IG1hcChtb2RlbCwgY29lZikpICU+JSAgIyBnZXQgdGhlIGNvZWZmaWNpZW50cyBvdXQgb2YgdGhlIG1vZGVsCiAgICAgIGdyb3VwX2J5KGdyYWRlLCBzdWJqZWN0LCB0ZXN0X3llYXIpICU+JSAjIGdyb3VwIHRoZSBkYXRhIAogICAgICBkbyhkYXRhLmZyYW1lKHQodW5saXN0KC4kY29lZnMpKSkpICMgc3BsaXQgZWFjaCBjb2VmZmljaWVudCBpbnRvIGEgbmV3IGNvbHVtbgopCgpuYW1lcyhieV9ncm91cCkKIyBSZXBsYWNlIHRoZSA2dGggZWxlbWVudHMgbmFtZQpuYW1lcyhieV9ncm91cClbNl0gPC0gImVzdF9pbnRlcmNlcHQiCiMgUmVwbGFjZSB0aGUgN3RoIGVsZW1lbnRzIG5hbWUKbmFtZXMoYnlfZ3JvdXApWzddIDwtICJlc3Rfc3MxIgoKYGBgCgojcGxvdCBhbGwgNTAgZXN0aW1hdGVzIG9uIGdyYXBoCmBgYHtyIHBsb3RmaWZ0eW1vZGVsZXN0aW1hdGVzfQpnZ3Bsb3QoYnlfZ3JvdXApICsgc2NhbGVfeF9jb250aW51b3VzKGxpbWl0cyA9IGMoNzAwLCAxNjAwKSkgKwogIHNjYWxlX3lfY29udGludW91cyhsaW1pdHMgPSBjKDcwMCwgMTYwMCkpICsgCiAgZ2VvbV9hYmxpbmUoZGF0YSA9IGJ5X2dyb3VwLCAKICAgICAgICAgICAgICBhZXMoc2xvcGUgPSBlc3Rfc3MxLCBpbnRlcmNlcHQgPSBlc3RfaW50ZXJjZXB0KSwgCiAgICAgICAgICAgICAgYWxwaGEgPSBJKDAuNSkpICsgdGhlbWVfYncoKQpgYGAKCiMjIyBXaGF0IHdlIGxlYXJuZWQgZnJvbSBwcmV2aW91cyBkaWFnbm9zdGljIGFuYWx5c2VzCgojSW4gb3VyIHByZXZpb3VzIG1vZHVsZSB3ZSBkZXRlcm1pbmVkIHRoYXQgYSBvbmUtc2l6ZS1maXRzLWFsbCBhcHByb2FjaCBsZWQgdG8gYSBtb2RlbCB0aGF0IAojbW9zdGx5IGlkZW50aWZpZWQgc2Nob29scyBpbiBsb3dlciBncmFkZXMsIGFuZCBtb3N0IG9mdGVuIGlkZW50aWZpZWQgc2Nob29scyAKI3dpdGggc21hbGwgY2xhc3Mgc2l6ZXMuCgojUmVzaWR1YWxzIG9mIDUwIG1vZGVscyBzaG93ZWQgbWlzc3BlY2lmaWNhdGlvbgoKI091dGxpZXJzIGFuZCBoaWdoIGxldmVyYWdlIHBvaW50cyBhcmUgaW5mbHVlbmNpbmcgZGF0YS4gV2Uga25vdyB3aGVuIGxvb2tpbmcgYXQgbWF0aCBjb21wYXJlZCB0byByZWFkaW5nLCBtYXRoIHNjb3JlcyBoYXZlIG1vcmUgb3V0bGllcnMgdGhhdCBhcmUgcHVsbGluZyB0aGUgc2xvcGUgb2Ygc3MxIGRvd24uIAojI1Bvc3NpYmx5IGFkZCBzdWJqZWN0IGFzIHZhcmlhYmxlCiMjUG9zc2libHkgdGVzdCBvdGhlciB2YXJpYWJsZXMsIGxpa2UgZ3JhZGUgbGV2ZWwsIHRvIGZpbmQgb3V0bGllcnMKCiNBbHNvIGxlYXJuZWQgdGhhdCBvdXRsaWVycyBleGlzdCBpbiBjb250ZXh0IG9mIGdyYWRlIGFuZCBsb2NhbGUuIFRoZXNlIGFsc28gbWlnaHQgYmUgZ29vZCB2YXJpYWJsZXMgdG8gaW5jbHVkZSBpbiBtb2RlbC4gQWxzbyBtYXkgd2FudCB0byB0ZXN0IHNvbWUgbW9yZSB1c2luZyBERklUUyBhbmQgQ29va3MuIAoKCiMjI05ldyBkaWFnbm9zdGljcy9tb2RlbCBjb21wYXJpc2lvbnMKCgojI1RFU1RJTkcgRk9SIE9VVExJRVJTIyMKCiNMb29rIGF0IGhpZ2ggbGV2ZXJhZ2UgcG9pbnRzIGZvciB2YXJpYWJsZXMtIGJvdGggdG8gaW5jbHVkZSBpbiA1MCBtb2RlbHMgYW5kIG1heWJlIHRvIGluY2x1ZGUgaW4gZ2xvYmFsIG1vZGVsIAojY3JlYXRlIG1vZGVsIGFuZCBhdWdtZW50ZWQgc2NvcmVzCmBgYHtyfQpsaWJyYXJ5KGJyb29tKSAjIFIgcGFja2FnZSBmb3Igd29ya2luZyB3aXRoIHJlZ3Jlc3Npb24gbW9kZWxzIHNtb290aGx5CiMgQ3JlYXRlIGEgbmV3IGRhdGFzZXQgd2l0aCBhdWdtZW50ZWQgdmFsdWVzCm0xIDwtIGxtKHNzMiB+IHNzMSArIHRlc3RfeWVhciwgZGF0YSA9IGZ1bGxfZGF0YSkKc2NoX3Njb3JlX20xIDwtIGF1Z21lbnQobTEpICMgZ2VuZXJhdGUgdGhlIGxldmVyYWdlcyBhbmQgbW9yZSBmb3IgZWFjaCByb3cgaW4gZGF0YQpuYW1lcyhzY2hfc2NvcmVfbTEpCmBgYAoKI2dlbmVyYXRlIGluZmx1ZW5jZSBzY29yZXMKYGBge3IgaW5mbHVlbmNlSGlzdG9ncmFtfQptMV9pbmYgPC0gaW5mbHVlbmNlLm1lYXN1cmVzKG0xKQpoZWFkKG0xX2luZiRpbmZtYXQpCmBgYAoKI2ZhY2V0IHdyYXAgYnkgdGVzdCB5ZWFyCmBgYHtyfQpwbG90ZGYgPC0gc2NoX3Njb3JlX20xCnBsb3RkZiRkZmJldGFfc3MxIDwtIG0xX2luZiRpbmZtYXRbLCAyXQoKZ2dwbG90KHBsb3RkZiwgYWVzKHggPSBzczEsIHkgPSBkZmJldGFfc3MxKSkgKyAKICBnZW9tX3BvaW50KGFscGhhPUkoMC4yKSkgKyAKICBnZW9tX2hsaW5lKHlpbnRlcmNlcHQgPSAyL3NxcnQobnJvdyhtMSRtb2RlbCkpKSArIAogIGdlb21faGxpbmUoeWludGVyY2VwdCA9IC0yL3NxcnQobnJvdyhtMSRtb2RlbCkpKSArIAogIGZhY2V0X3dyYXAofnRlc3RfeWVhcikgKyAKICBnZW9tX3J1ZyhhbHBoYT0xLzQsIHBvc2l0aW9uID0gImppdHRlciIpICsKICB0aGVtZV9idygpCgpgYGAKI3NvbWUgeWVhcnMgc2VlbSB0byBleGhpYml0IG91dGxpZXJzLCBlc3BlY2lhbGx5IGF0IGxvd2VyIHZhbHVlcyBvZiBzczEuIFRoZXNlIG91dGxpZXJzIHNlZW0gbW9yZSBsaWtlbHkgdG8gcHVsbCBkb3duIHRoZSBzbG9wZSBvZiBzczEuIFRoaXMgZG9lc24ndCBzZWVtIGFzIGRyYXN0aWMgYXMgb3RoZXIgdmFyaWFibGVzLgoKCiNmYWNldCB3cmFwIGJ5IGdyYWRlIGxldmVsCmBgYHtyfQptMSA8LSBsbShzczIgfiBzczEgKyBncmFkZSwgZGF0YSA9IGZ1bGxfZGF0YSkKc2NoX3Njb3JlX20xIDwtIGF1Z21lbnQobTEpICMgZ2VuZXJhdGUgdGhlIGxldmVyYWdlcyBhbmQgbW9yZSBmb3IgZWFjaCByb3cgaW4gZGF0YQpuYW1lcyhzY2hfc2NvcmVfbTEpCgptMV9pbmYgPC0gaW5mbHVlbmNlLm1lYXN1cmVzKG0xKQpoZWFkKG0xX2luZiRpbmZtYXQpCgpwbG90ZGYgPC0gc2NoX3Njb3JlX20xCnBsb3RkZiRkZmJldGFfc3MxIDwtIG0xX2luZiRpbmZtYXRbLCAyXQoKZ2dwbG90KHBsb3RkZiwgYWVzKHggPSBzczEsIHkgPSBkZmJldGFfc3MxKSkgKyAKICBnZW9tX3BvaW50KGFscGhhPUkoMC4yKSkgKyAKICBnZW9tX2hsaW5lKHlpbnRlcmNlcHQgPSAyL3NxcnQobnJvdyhtMSRtb2RlbCkpKSArIAogIGdlb21faGxpbmUoeWludGVyY2VwdCA9IC0yL3NxcnQobnJvdyhtMSRtb2RlbCkpKSArIAogIGZhY2V0X3dyYXAofmdyYWRlKSArIAogIGdlb21fcnVnKGFscGhhPTEvNCwgcG9zaXRpb24gPSAiaml0dGVyIikgKwogIHRoZW1lX2J3KCkKYGBgCiNsb3dlciBncmFkZSBsZXZlbHMgc2VlbSB0byBiZSBhZmZlY3RlZCBieSBvdXRsaWVycyBhdCBsb3dlciBsZXZlbHMgb2Ygc3MxIGJ5IHB1bGxpbmcgdGhlIHNsb3BlIGRvd24gd2hpbGUgaGlnZXIgZ3JhZGUgbGV2ZWxzIHNlZW0gdG8gYmUgYWZmZWN0ZWQgYnkgb3V0bGllcnMgYXQgbG93ZXIgbGV2ZWxzIG9mIHNzMSBieSBwdWxsaW5nIHRoZSBzbG9wZSB1cAoKCiNmYWNldCB3cmFwIGJ5IGxvY2FsZQpgYGB7cn0KbTEgPC0gbG0oc3MyIH4gc3MxICsgbG9jYWxlLCBkYXRhID0gZnVsbF9kYXRhKQpzY2hfc2NvcmVfbTEgPC0gYXVnbWVudChtMSkgIyBnZW5lcmF0ZSB0aGUgbGV2ZXJhZ2VzIGFuZCBtb3JlIGZvciBlYWNoIHJvdyBpbiBkYXRhCm5hbWVzKHNjaF9zY29yZV9tMSkKCm0xX2luZiA8LSBpbmZsdWVuY2UubWVhc3VyZXMobTEpCmhlYWQobTFfaW5mJGluZm1hdCkKCnBsb3RkZiA8LSBzY2hfc2NvcmVfbTEKcGxvdGRmJGRmYmV0YV9zczEgPC0gbTFfaW5mJGluZm1hdFssIDJdCgpnZ3Bsb3QocGxvdGRmLCBhZXMoeCA9IHNzMSwgeSA9IGRmYmV0YV9zczEpKSArIAogIGdlb21fcG9pbnQoYWxwaGE9SSgwLjIpKSArIAogIGdlb21faGxpbmUoeWludGVyY2VwdCA9IDIvc3FydChucm93KG0xJG1vZGVsKSkpICsgCiAgZ2VvbV9obGluZSh5aW50ZXJjZXB0ID0gLTIvc3FydChucm93KG0xJG1vZGVsKSkpICsgCiAgZmFjZXRfd3JhcCh+bG9jYWxlKSArIAogIGdlb21fcnVnKGFscGhhPTEvNCwgcG9zaXRpb24gPSAiaml0dGVyIikgKwogIHRoZW1lX2J3KCkKYGBgCiNsb29rcyBsaWtlIG91dGxpZXJzIGFsc28gZ3JlYXRseSBhZmZlY3Qgc3MxIGJ5IGxvY2FsZSBhcyB3ZWxsLiBUaGUgb3V0bGllcnMgYXJlIG11Y2ggbW9yZSBsaWtsZXkgdG8gYXBwZWFyIGluIGxvY2FsZSAxIChsYXJnZSBjaXRpZXMpIHdoZXJlIGxvd2VyIHZhbHVlcyBvZiBzczEgYWZmZWN0IHRoZSBzbG9wZSBvZiBzczEgYnkgcHVsbGluZyBpdCBkb3duCgoKI2ZhY2V0IHdyYXAgYnkgc2Nob29sIHNpemUKYGBge3J9Cm0xIDwtIGxtKHNzMiB+IHNzMSArIHNjaG9vbF9zaXplLCBkYXRhID0gZnVsbF9kYXRhKQpzY2hfc2NvcmVfbTEgPC0gYXVnbWVudChtMSkgIyBnZW5lcmF0ZSB0aGUgbGV2ZXJhZ2VzIGFuZCBtb3JlIGZvciBlYWNoIHJvdyBpbiBkYXRhCm5hbWVzKHNjaF9zY29yZV9tMSkKCm0xX2luZiA8LSBpbmZsdWVuY2UubWVhc3VyZXMobTEpCmhlYWQobTFfaW5mJGluZm1hdCkKCnBsb3RkZiA8LSBzY2hfc2NvcmVfbTEKcGxvdGRmJGRmYmV0YV9zczEgPC0gbTFfaW5mJGluZm1hdFssIDJdCgpnZ3Bsb3QocGxvdGRmLCBhZXMoeCA9IHNzMSwgeSA9IGRmYmV0YV9zczEpKSArIAogIGdlb21fcG9pbnQoYWxwaGE9SSgwLjIpKSArIAogIGdlb21faGxpbmUoeWludGVyY2VwdCA9IDIvc3FydChucm93KG0xJG1vZGVsKSkpICsgCiAgZ2VvbV9obGluZSh5aW50ZXJjZXB0ID0gLTIvc3FydChucm93KG0xJG1vZGVsKSkpICsgCiAgZmFjZXRfd3JhcCh+c2Nob29sX3NpemUpICsgCiAgZ2VvbV9ydWcoYWxwaGE9MS80LCBwb3NpdGlvbiA9ICJqaXR0ZXIiKSArCiAgdGhlbWVfYncoKQpgYGAKI2xvb2tzIGxpa2Ugc21hbGwgc2Nob29scyBhcmUgbW9zdCBzdWJqZWN0IHRvIG91dGxpZXJzIChhbHJlYWR5IGtuZXcgdGhhdCkKCiNmYWNldCB3cmFwIGJ5IGNoYXJ0ZXIKYGBge3J9Cm0xIDwtIGxtKHNzMiB+IHNzMSArIGNoYXJ0ZXJfaW5kLCBkYXRhID0gZnVsbF9kYXRhKQpzY2hfc2NvcmVfbTEgPC0gYXVnbWVudChtMSkgIyBnZW5lcmF0ZSB0aGUgbGV2ZXJhZ2VzIGFuZCBtb3JlIGZvciBlYWNoIHJvdyBpbiBkYXRhCm5hbWVzKHNjaF9zY29yZV9tMSkKCm0xX2luZiA8LSBpbmZsdWVuY2UubWVhc3VyZXMobTEpCmhlYWQobTFfaW5mJGluZm1hdCkKCnBsb3RkZiA8LSBzY2hfc2NvcmVfbTEKcGxvdGRmJGRmYmV0YV9zczEgPC0gbTFfaW5mJGluZm1hdFssIDJdCgpnZ3Bsb3QocGxvdGRmLCBhZXMoeCA9IHNzMSwgeSA9IGRmYmV0YV9zczEpKSArIAogIGdlb21fcG9pbnQoYWxwaGE9SSgwLjIpKSArIAogIGdlb21faGxpbmUoeWludGVyY2VwdCA9IDIvc3FydChucm93KG0xJG1vZGVsKSkpICsgCiAgZ2VvbV9obGluZSh5aW50ZXJjZXB0ID0gLTIvc3FydChucm93KG0xJG1vZGVsKSkpICsgCiAgZmFjZXRfd3JhcCh+Y2hhcnRlcl9pbmQpICsgCiAgZ2VvbV9ydWcoYWxwaGE9MS80LCBwb3NpdGlvbiA9ICJqaXR0ZXIiKSArCiAgdGhlbWVfYncoKQpgYGAKIyBjaGFydGVyIGRvZXNuJ3QgcmVhbGx5IHNlZW0gdG8gc2hvdyB0aGF0IG11Y2ggb2YgYW4gZWZmZWN0CgoKI2Jhc2VkIG9uIHRoZXNlIGFuYWx5c2lzLCBpdCBzZWVtcyBsaWtlIHRvIGhlbHAgY29udHJvbCBmb3Igb3V0bGllcnMsIGxvY2F0aW9uIGFuZCBncmFkZSBsZXZlbCBtdXN0IHNvbWVob3cgYmUgY29udHJvbGxlZCBmb3IgaW4gdGhlIG1vZGVsCgojIyNVU0lORyBGT1JNQUwgTU9ERUwgRElBR05PU1RJQ1MgVE8gU0hPVyBNT0RFTCBJTVBST1ZFTUVOVCMjIwoKI2V4cGxvcmUgYW5kIGNvbXBhcmUgcmVzaWR1YWwgcGxvdHMgdG8gc2VlIGlmIGJldHRlciBmaXQKCiNyZXNpdWFsIHBsb3QgNTAgb3JpZ2luYWwgbW9kZWxzCmBgYHtyIGdldHJlc2lkdWFsc2Zyb21lYWNobW9kZWx9Cmluc3RhbGwucGFja2FnZXMoImNvd3Bsb3QiKQpieV9ncm91cCA8LSBieV9ncm91cCAlPiUKICBtdXRhdGUoCiAgICByZXNpZHMgPSBtYXAyKGRhdGEsIG1vZGVsLCBhZGRfcmVzaWR1YWxzKSwKICAgIHByZWRzID0gbWFwMihkYXRhLCBtb2RlbCwgYWRkX3ByZWRpY3Rpb25zKQogICkKYnlfZ3JvdXAKCiMgdW5uZXN0IGV4cGFuZHMgdGhlIGRhdGEgb3V0IGZyb20gYSBjb2x1bW4KcGxvdGRmIDwtIGxlZnRfam9pbih1bm5lc3QoYnlfZ3JvdXAsIHJlc2lkcyksCiAgICAgICAgICAgICAgICAgICAgdW5uZXN0KGJ5X2dyb3VwLCBwcmVkcykpCgojIFJlc2lkdWFsIHBsb3QKCnBsb3RkZiAlPiUKICBnZ3Bsb3QoYWVzKHggPSBwcmVkLCB5ID0gcmVzaWQpKSArCiAgZ2VvbV9wb2ludChhbHBoYSA9IEkoMC4yKSkgKyB0aGVtZV9idygpICsKICBnZW9tX3Ntb290aCgpCgpgYGAKI20yIHJlc2lkdWFsIHBsb3Q9IGdyYWRlIGFuZCBkaXN0aWQKCmBgYHtyfQptMiA8LSBsbShzczIgfiBzczEgKyBncmFkZSArIGRpc3RpZCwgZGF0YSA9IGZ1bGxfZGF0YSkKCmZ1bGxfZGF0YSRwcmVkaWN0ZWRtMiA8LSBwcmVkaWN0KG0yKQpmdWxsX2RhdGEkcmVzaWR1YWxzbTIgPC0gcmVzaWR1YWxzKG0yKQoKZ2dwbG90KGRhdGE9ZnVsbF9kYXRhLCBhZXMoeCA9IHByZWRpY3RlZG0yLCB5ID0gcmVzaWR1YWxzbTIpKSArCmdlb21fcG9pbnQoYWxwaGEgPSBJKDAuMikpICsgdGhlbWVfYncoKSArCmdlb21fc21vb3RoKCkKCmBgYAoKI20zIHJlc2lkdWFsIHBsb3Q9IHN1YmplY3QgYW5kIGxvY2FsZQpgYGB7cn0KbTMgPC0gbG0oc3MyIH4gc3MxICsgc3ViamVjdCArIGxvY2FsZSwgbmEuYWN0aW9uPW5hLmV4Y2x1ZGUsIGRhdGEgPSBmdWxsX2RhdGEpCgpnZ3Bsb3QoZGF0YT1mdWxsX2RhdGEsIGFlcyh4ID0gcHJlZGljdChtMyksIHkgPSByZXNpZHVhbHMobTMpKSkgKwpnZW9tX3BvaW50KGFscGhhID0gSSgwLjIpKSArIHRoZW1lX2J3KCkgKwpnZW9tX3Ntb290aCgpCgpgYGAKCiNtNCByZXNpZHVhbCBwbG90PSBncmFkZSBhbmQgdGVzdCB5ZWFyCmBgYHtyfQptNCA8LSBsbShzczIgfiBzczEgKyBncmFkZSArIHRlc3RfeWVhciwgbmEuYWN0aW9uPW5hLmV4Y2x1ZGUsIGRhdGEgPSBmdWxsX2RhdGEpCgpnZ3Bsb3QoZGF0YT1mdWxsX2RhdGEsIGFlcyh4ID0gcHJlZGljdChtNCksIHkgPSByZXNpZHVhbHMobTQpKSkgKwpnZW9tX3BvaW50KGFscGhhID0gSSgwLjIpKSArIHRoZW1lX2J3KCkgKwpnZW9tX3Ntb290aCgpCgpgYGAKCiNtNSByZXNpZHVhbCBwbG90PSBncmFkZSBhbmQgbG9jYWxlCmBgYHtyfQptNSA8LSBsbShzczIgfiBzczEgKyBncmFkZSArIGxvY2FsZSwgbmEuYWN0aW9uPW5hLmV4Y2x1ZGUsIGRhdGEgPSBmdWxsX2RhdGEpCgpnZ3Bsb3QoZGF0YT1mdWxsX2RhdGEsIGFlcyh4ID0gcHJlZGljdChtNSksIHkgPSByZXNpZHVhbHMobTUpKSkgKwpnZW9tX3BvaW50KGFscGhhID0gSSgwLjIpKSArIHRoZW1lX2J3KCkgKwpnZW9tX3Ntb290aCgpCgpgYGAKCiNtNiByZXNpZHVhbCBwbG90PSBncmFkZSBhbmQgZnJsIHBlcgpgYGB7cn0KbTYgPC0gbG0oc3MyIH4gc3MxICsgZ3JhZGUgKyBmcmxfcGVyLCBuYS5hY3Rpb249bmEuZXhjbHVkZSwgZGF0YSA9IGZ1bGxfZGF0YSkKCmdncGxvdChkYXRhPWZ1bGxfZGF0YSwgYWVzKHggPSBwcmVkaWN0KG02KSwgeSA9IHJlc2lkdWFscyhtNikpKSArCmdlb21fcG9pbnQoYWxwaGEgPSBJKDAuMikpICsgdGhlbWVfYncoKSArCmdlb21fc21vb3RoKCkKCmBgYAoKI203IHJlc2lkdWFsIHBsb3Q9IGVucm9sbG1lbnQgYW5kIGdyYWRlIGxldmVsCmBgYHtyfQptNyA8LSBsbShzczIgfiBzczEgKyBlbnJvbGxtZW50ICsgZ3JhZGUsIG5hLmFjdGlvbj1uYS5leGNsdWRlLCBkYXRhID0gZnVsbF9kYXRhKQoKZ2dwbG90KGRhdGE9ZnVsbF9kYXRhLCBhZXMoeCA9IHByZWRpY3QobTcpLCB5ID0gcmVzaWR1YWxzKG03KSkpICsKZ2VvbV9wb2ludChhbHBoYSA9IEkoMC4yKSkgKyB0aGVtZV9idygpICsKZ2VvbV9zbW9vdGgoKQoKYGBgCgoKIyMjQUREIFZBUklBQkxFUyBUTyA1MCBORVNURUQgTU9ERUwgVE8gU0VFIElGIEJFVFRFUiBGSVQjIyMKCmBgYHtyfQpieV9ncm91cDEgPC0gZnVsbF9kYXRhWywgMToyOF0gJT4lCiAgZ3JvdXBfYnkoZ3JhZGUsIHN1YmplY3QsIHRlc3RfeWVhcikgJT4lCiAgbmVzdCgpCgpoZWFkKGJ5X2dyb3VwMSkKYGBgCgoKCiMjI0NPTVBBUkUgNTAgTkVTVEVEIE1PREVMIEFQUFJPQUNIIFRPIENPUExFWCBHTE9CQUwgTU9ERUwgRklUIFRPIEFMTCBUSEUgREFUQSMjIwoKI3RyeSBuZXcgZ2xvYmFsIG1vZGVscyB0byBjb21wYXJlIGluY2x1ZGluZyBuZXcgdmFyaWFibGVzICh0aGVzZSBzZWxlY3RlZCBiYyBvZiBwcmV2aW91cyBhbmQgYWJvdmUgYW5hbHlzaXMgYXMgaW5kaWNhdGluZyB0aGV5IG1heSBiZSBoaWdobHkgaW5mbHVlbnRpYWwgdmFyaWFibGVzKQoKI01vdmluZyBmb3J3YXJkIHdpdGggKG0yKSBkaXN0aWQgYW5kIGdyYWRlIGxldmVsIGFzIGFkZGVkIHRvIGFsdCBtb2RlbCwgYmFzZWQgb24gcmVzaWR1YWwgcGxvdHMgYW5kIHByZXZpb3VzIGFuYWx5c2lzCgojbG9vayBhdCBtb2RlbCBjb21wYXJpc29uIGJldHdlZW4gc2ltcGxlIGFuZCBhbHQgbW9kZWwKCmBgYHtyfQojIERlZmluZSBhIGdyb3VwZWQgZGF0YWZyYW1lIHVzaW5nIG9ubHkgdGhlIGNvbHVtbnMgd2UgbmVlZCAoMToxMCkKIyBncm91cCBieSB0aGUgZ3JhZGUsIHN1YmplY3QsIGFuZCB5ZWFyCiMgVGhlbiBuZXN0IHNvIHRoZSBkYXRhIGhhbmdzIHRvZ2V0aGVyIGluIG9uZSBsaXN0CmJ5X2dyb3VwIDwtIGZ1bGxfZGF0YVssIDE6MTBdICU+JQogIGdyb3VwX2J5KGdyYWRlLCBzdWJqZWN0LCB0ZXN0X3llYXIpICU+JQogIG5lc3QoKQoKIyBEZWZpbmUgYSBzaW1wbGUgZnVuY3Rpb24gdGhhdCBmaXRzIG91ciBtb2RlbApzaW1wbGVfbW9kZWwgPC0gZnVuY3Rpb24oZGYpIAogIGxtKHNzMiB+IHNzMSwgZGF0YSA9IGZ1bGxfZGF0YSkKCmFsdF9tb2RlbCA8LSBmdW5jdGlvbihkZikgCiAgbG0oc3MyIH4gc3MxICsgZGlzdGlkICsgZ3JhZGUsIGRhdGEgPSBmdWxsX2RhdGEpCgojIEFwcGx5IHRoYXQgbW9kZWwgdG8gZWFjaCBncm91cCBpbiBvdXIgZGF0YXNldCB1c2luZyB0aGUgYG1hcGAgZnVuY3Rpb24KIyBUaGlzIGlzIGVxdWl2YWxlbnQgdG8gYSBsb29wLCBidXQgbW9yZSBlZmZpY2llbnQgdG8gd3JpdGUKYnlfZ3JvdXAgPC0gYnlfZ3JvdXAgJT4lCiAgbXV0YXRlKGJhc2VfbW9kZWwgPSBtYXAoZGF0YSwgc2ltcGxlX21vZGVsKSwgCiAgICAgICAgIGZ1bGxfbW9kZWwgPSBtYXAoZGF0YSwgYWx0X21vZGVsKSkKCiMgQXBwbHkgYSBmdW5jdGlvbiB0aGF0IG1ha2VzIGFuIEYtdGVzdCBvZiBvdXIgdHdvIG1vZGVscyAoeCBhbmQgeSkKIyByZXR1cm5zIG9ubHkgdGhlIHN0YXRpc3RpY3MgZnJvbSB0aGUgYW5vdmEgb2JqZWN0IHdlIHdhbnQKIyBpbiB0aGlzIGNhc2UsIHRoZSBkaWZmZXJlbmNlIGluIHRoZSBzdW0gb2Ygc3F1YXJlcyBhbmQgdGhlIHAtdmFsdWUgb2YgdGhlIAojIEYtdGVzdAoKc2ltcGxlX2Fub3ZhIDwtIGZ1bmN0aW9uKHgsIHksIHN0YXQgPSBjKCJzcyIsICJwIiksIC4uLil7CiAgb3V0IDwtIGFub3ZhKHgsIHkpCiAgaWYoc3RhdCA9PSAic3MiKXsKICAgIHJldHVybihvdXQkYFN1bSBvZiBTcWBbWzJdXSkKICB9IGVsc2UgaWYoc3RhdCA9PSAicCIpewogICAgcmV0dXJuKG91dCRgUHIoPkYpYFtbMl1dKQogIH0KfQoKZnRlc3RzIDwtIGJ5X2dyb3VwICU+JSByb3d3aXNlKCkgJT4lCiAgICBkbyhzcyA9IHVubGlzdChzaW1wbGVfYW5vdmEoeCA9IC4kYmFzZV9tb2RlbCwgeSA9IC4kZnVsbF9tb2RlbCwgc3RhdCA9ICJzcyIpKSwgCiAgICAgIHB2YWwgPSB1bmxpc3Qoc2ltcGxlX2Fub3ZhKHggPSAuJGJhc2VfbW9kZWwsIHkgPSAuJGZ1bGxfbW9kZWwsIHN0YXQgPSAicCIpKSkKCmZ0ZXN0cyA8LSBhcHBseShmdGVzdHMsIDIsIHVubGlzdCkgIyBjb252ZXJ0IHRvIGEgbnVtZXJpYyBvYmplY3QKZnRlc3RzIDwtIGFzLmRhdGEuZnJhbWUoZnRlc3RzKSAjIG1ha2UgaW50byBhIGRhdGEuZnJhbWUgZm9yIGVhc2Ugb2YgdXNlCgp0YWJsZShmdGVzdHMkcHZhbCA8IDAuMDUpCmBgYAojdGhpcyBpcyB0ZWxsaW5nIG1lIHRoYXQgaW4gYWxsIDUwIGNhc2VzLCB0aGUgZnVsbGVyIGdsb2JhbCBtb2RlbCB3YXMgYSBtdWNoIGEgYmV0dGVyIGZpdCBvdmVyIHRoZSBzaW1wbGUsIHByb3Bvc2VkIDUwIG1vZGVscwoKI3RoZXJlZm9yZSwgSSByZWNvbW1lbmQgdXNpbmcgdGhlIGZvbGxvd2luZyBnbG9iYWwgbW9kZWwgaW4gcGxhY2Ugb2YgdGhlIDUwIG1vZGVsIGFwcHJvYWNoCiAgIyBzczIgfiBzczEgKyBkaXN0aWQgKyBncmFkZQoKCgoKCgoK